Pitcher Precision

Sasank Vishnubhatla

4/17/2019

Last Update: 2019-05-12 16:30:20

Libraries

Let’s load some libraries in first.

library(baseballr)
library(pitchRx)
library(tidyverse)

Let’s also clean out environment.

rm(list = ls())

With these libraries, we can get out data as well as visaulize it. Let’s take a look at some players to see what we can look at.

Data Loading

Here are the list of players I will be looking at.

Let’s now scrape the data for each player.

scrape.data = function(start, id) {
    data = scrape_statcast_savant(start_date = start,
                                  end_date = format(Sys.time(), "%Y-%m-%d"),
                                  playerid = id,
                                  player_type = 'pitcher')
    data
}

start = "2019-01-01"

syndergaard.data = scrape.data(start, 592789)
corbin.data = scrape.data(start, 571578)
vazquez.data = scrape.data(start, 553878)
stroman.data = scrape.data(start, 573186)
verlander.data = scrape.data(start, 434378)

Now with our data, let’s get the information we want out of it.

filter.data = function(data) {
    filtered = data.frame(name = data %>% pull(player_name),
                          pitch = data %>% pull(pitch_type),
                          outcome = data %>% pull(type),
                          date = data %>% pull(game_date),
                          event = data %>% pull(events),
                          descrip = data %>% pull(description),
                          xcoord = data %>% pull(plate_x),
                          ycoord = data %>% pull(plate_z),
                          xmove = data %>% pull(pfx_x),
                          ymove = data %>% pull(pfx_z),
                          velo = data %>% pull(effective_speed),
                          spin = data %>% pull(release_spin_rate),
                          exvelo = data %>% pull(launch_speed),
                          exang = data %>% pull(launch_angle),
                          contact = data %>% pull(launch_speed_angle),
                          year = substring(data %>% pull(game_date), 0, 4))
    filtered$exvelo[is.na(filtered$exvelo)] = 0
    filtered$exang[is.na(filtered$exang)] = 0
    filtered$contact[is.na(filtered$contact)] = 0
    filtered
}

syndergaard = filter.data(syndergaard.data)
corbin = filter.data(corbin.data)
stroman = filter.data(stroman.data)
vazquez = filter.data(vazquez.data)
verlander = filter.data(verlander.data)

With this filtered data, we have selected the following columns:

Visualization

Let’s start visualizing some of this data. Before that, let me define a strikezone. This strikezone was taken from the website Baseball with R

topKzone = 3.5
botKzone = 1.6
inKzone = -.95
outKzone = 0.95
kZone = data.frame(x = c(inKzone, inKzone, outKzone, outKzone, inKzone),
                   y = c(botKzone, topKzone, topKzone, botKzone, botKzone))

Location via Outcome

Let’s look at pitch location with if the pitch is a ball or strike. We know X is hit into play, B is ball, and S is any type of strike.

graph.pitch.heatmap.out = function(player) {
    graph = ggplot(player) +
        geom_jitter(aes(x = player$xcoord,
                        y = player$ycoord,
                        color = player$outcome)) +
        xlab("Horizontal Position") +
        ylab("Vertical Position") +
        ggtitle(paste(player$name[1], player$year[1], "Outcome", sep = " ")) +
        labs(color = "Pitch Outcome") +
        theme_minimal() + geom_path(aes(x, y), data = kZone)
    graph
}

Patrick Corbin

corbin.heatmap.out = graph.pitch.heatmap.out(corbin)
corbin.heatmap.out

Marcus Stroman

stroman.heatmap.out = graph.pitch.heatmap.out(stroman)
stroman.heatmap.out

Noah Syndergaard

syndergaard.heatmap.out = graph.pitch.heatmap.out(syndergaard)
syndergaard.heatmap.out
## Warning: Removed 1 rows containing missing values (geom_point).

Felipe Vazquez

vazquez.heatmap.out = graph.pitch.heatmap.out(vazquez)
vazquez.heatmap.out

Justin Verlander

verlander.heatmap.out = graph.pitch.heatmap.out(verlander)
verlander.heatmap.out
## Warning: Removed 100 rows containing missing values (geom_point).

Location via Type

Let’s look at pitch location via pitch type.

graph.pitch.heatmap.type = function(player) {
    graph = ggplot(player) +
        geom_jitter(aes(x = player$xcoord,
                        y = player$ycoord,
                        color = player$pitch)) +
        xlab("Horizontal Position") +
        ylab("Vertical Position") +
        ggtitle(paste(player$name[1], player$year[1], "Type", sep = " ")) +
        labs(color = "Pitch Type") +
        theme_minimal() + geom_path(aes(x, y), data = kZone)
    graph
}

Patrick Corbin

corbin.heatmap.type = graph.pitch.heatmap.type(corbin)
corbin.heatmap.type

Marcus Stroman

stroman.heatmap.type = graph.pitch.heatmap.type(stroman)
stroman.heatmap.type

Noah Syndergaard

syndergaard.heatmap.type = graph.pitch.heatmap.type(syndergaard)
syndergaard.heatmap.type
## Warning: Removed 1 rows containing missing values (geom_point).

Felipe Vazquez

vazquez.heatmap.type = graph.pitch.heatmap.type(vazquez)
vazquez.heatmap.type

Justin Verlander

verlander.heatmap.type = graph.pitch.heatmap.type(verlander)
verlander.heatmap.type
## Warning: Removed 100 rows containing missing values (geom_point).

Location via Velocity

Let’s look at pitch location via velocity.

graph.pitch.heatmap.velo = function(player) {
    graph = ggplot(player) +
        geom_jitter(aes(x = player$xcoord,
                        y = player$ycoord,
                        color = player$velo)) +
        xlab("Horizontal Position") +
        ylab("Vertical Position") +
        ggtitle(paste(player$name[1], player$year[1], "Velocity", sep = " ")) +
        labs(color = "Velocity") +
        scale_color_gradient(low = "blue", high = "red") +
        theme_minimal() + geom_path(aes(x, y), data = kZone)
    graph
}

Patrick Corbin

corbin.heatmap.velo = graph.pitch.heatmap.velo(corbin)
corbin.heatmap.velo

Marcus Stroman

stroman.heatmap.velo = graph.pitch.heatmap.velo(stroman)
stroman.heatmap.velo

Noah Syndergaard

syndergaard.heatmap.velo = graph.pitch.heatmap.velo(syndergaard)
syndergaard.heatmap.velo
## Warning: Removed 1 rows containing missing values (geom_point).

Felipe Vazquez

vazquez.heatmap.velo = graph.pitch.heatmap.velo(vazquez)
vazquez.heatmap.velo

Justin Verlander

verlander.heatmap.velo = graph.pitch.heatmap.velo(verlander)
verlander.heatmap.velo
## Warning: Removed 100 rows containing missing values (geom_point).

Movement

To view the movement, let’s just determine the average movement for each type of pitch that each player has. First let’s make a few helpful functions for us.

graph.pitch.xmovement = function(player) {
    graph = ggplot(player) +
        geom_boxplot(aes(x = player$pitch,
                         y = player$xmove,
                         color = player$pitch)) +
        coord_flip() +
        labs(color = "Pitch Type") +
        xlab("Pitch Type") + ylab("Horizontal Movement") +
        ggtitle(paste(player$name[1], player$year[1], "Horizontal Movement", sep = " ")) +
        theme_minimal()
}

graph.pitch.ymovement = function(player) {
    graph = ggplot(player) +
        geom_boxplot(aes(x = player$pitch,
                         y = player$ymove,
                         color = player$pitch)) +
        labs(color = "Pitch Type") +
        xlab("Pitch Type") + ylab("Vertical Movement") +
        ggtitle(paste(player$name[1], player$year[1], "Vertical Movement", sep = " ")) +
        theme_minimal()
}

Patrick Corbin

corbin.xmove = graph.pitch.xmovement(corbin)
corbin.ymove = graph.pitch.ymovement(corbin)
corbin.xmove

corbin.ymove

Marcus Stroman

stroman.xmove = graph.pitch.xmovement(stroman)
stroman.ymove = graph.pitch.ymovement(stroman)
stroman.xmove

stroman.ymove

Noah Syndergaard

syndergaard.xmove = graph.pitch.xmovement(syndergaard)
syndergaard.ymove = graph.pitch.ymovement(syndergaard)
syndergaard.xmove
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

syndergaard.ymove
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

Felipe Vazquez

vazquez.xmove = graph.pitch.xmovement(vazquez)
vazquez.ymove = graph.pitch.ymovement(vazquez)
vazquez.xmove

vazquez.ymove

Justin Verlander

verlander.xmove = graph.pitch.xmovement(verlander)
verlander.ymove = graph.pitch.ymovement(verlander)
verlander.xmove
## Warning: Removed 100 rows containing non-finite values (stat_boxplot).

verlander.ymove
## Warning: Removed 100 rows containing non-finite values (stat_boxplot).

Velocity

We need to separate each pitch first by type. Then we can see how the pitch’s velocity changed over time.

graph.pitch.velo = function(player) {
    graph = ggplot(player) +
        geom_line(aes(x = 1:length(player$velo),
                      y = player$velo,
                      color = player$pitch)) +
        xlab("Pitches Thrown") + ylab("Velocity") + labs(color = "Pitch Type") +
        ggtitle(paste(player$name[1], player$year[1], "Velocity Chart", sep = " ")) +
        theme_minimal()
}

Patrick Corbin

corbin.velo = graph.pitch.velo(corbin)
corbin.velo

Marcus Stroman

stroman.velo = graph.pitch.velo(stroman)
stroman.velo

Noah Syndergaard

syndergaard.velo = graph.pitch.velo(syndergaard)
syndergaard.velo
## Warning: Removed 1 rows containing missing values (geom_path).

Felipe Vazquez

vazquez.velo = graph.pitch.velo(vazquez)
vazquez.velo

Justin Verlander

verlander.velo = graph.pitch.velo(verlander)
verlander.velo
## Warning: Removed 100 rows containing missing values (geom_path).

Spin Rate

Let’s create our graping function.

graph.pitch.spin = function(player) {
    graph = ggplot(player) +
        geom_step(aes(x = 1:length(player$spin),
                      y = player$spin,
                      color = player$pitch),
                  direction = "vh") +
        xlab("Pitches Thrown") + ylab("Spin Rate") + labs(color = "Pitch Type") +
        ggtitle(paste(player$name[1], player$year[1], "Spin Rate Chart", sep = " ")) +
        theme_minimal()
}

Patrick Corbin

corbin.spin = graph.pitch.spin(corbin)
corbin.spin

Marcus Stroman

stroman.spin = graph.pitch.spin(stroman)
stroman.spin

Noah Syndergaard

syndergaard.spin = graph.pitch.spin(syndergaard)
syndergaard.spin
## Warning: Removed 1 rows containing missing values (geom_path).

Felipe Vazquez

vazquez.spin = graph.pitch.spin(vazquez)
vazquez.spin

Justin Verlander

verlander.spin = graph.pitch.spin(verlander)
verlander.spin
## Warning: Removed 100 rows containing missing values (geom_path).

Analysis

I’ll be looking at a few specific Pittsburgh Pirates pitchers and looking at them from year to year.

Jameson Taillon

Data Acquisition

Let’s first read in our data for Taillon.

taillon.data.2018 = scrape.data("2018-01-01", 592791)
## 2018-01-01 is not a date. Attempting to coerce...
## https://baseballsavant.mlb.com/statcast_search/csv?all=true&hfPT=&hfAB=&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=R%7CPO%7CS%7C&hfC&hfSea=2018%7C&hfSit=&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&player_type=pitcher&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&pitchers_lookup%5B%5D=592791&game_date_gt=2018-01-01&game_date_lt=2019-05-12&hfFlag=&hfPull=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=h_launch_speed&sort_order=desc&min_abs=0&type=details
## These data are from BaseballSevant and are property of MLB Advanced Media, L.P. All rights reserved.
## Grabbing data, this may take a minute...
## URL read and payload acquired successfully.
taillon.data.2019 = scrape.data("2019-01-01", 592791)
## 2019-01-01 is not a date. Attempting to coerce...
## https://baseballsavant.mlb.com/statcast_search/csv?all=true&hfPT=&hfAB=&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=R%7CPO%7CS%7C&hfC&hfSea=2019%7C&hfSit=&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&player_type=pitcher&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&pitchers_lookup%5B%5D=592791&game_date_gt=2019-01-01&game_date_lt=2019-05-12&hfFlag=&hfPull=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=h_launch_speed&sort_order=desc&min_abs=0&type=details
## These data are from BaseballSevant and are property of MLB Advanced Media, L.P. All rights reserved.
## Grabbing data, this may take a minute...
## URL read and payload acquired successfully.
taillon.2018 = filter.data(taillon.data.2018)
taillon.2019 = filter.data(taillon.data.2019)

2018

Now, let’s just get some averages of Taillon’s pitches for 2018.

taillon.ff.2018 = taillon.2018[taillon.2018$pitch == "FF",]
taillon.ft.2018 = taillon.2018[taillon.2018$pitch == "FT",]
taillon.sl.2018 = taillon.2018[taillon.2018$pitch == "SL",]
taillon.cu.2018 = taillon.2018[taillon.2018$pitch == "CU",]
taillon.ch.2018 = taillon.2018[taillon.2018$pitch == "CH",]

taillon.ff.2018 = taillon.ff.2018[complete.cases(taillon.ff.2018),]
taillon.ft.2018 = taillon.ft.2018[complete.cases(taillon.ft.2018),]
taillon.sl.2018 = taillon.sl.2018[complete.cases(taillon.sl.2018),]
taillon.cu.2018 = taillon.cu.2018[complete.cases(taillon.cu.2018),]
taillon.ch.2018 = taillon.ch.2018[complete.cases(taillon.ch.2018),]
Pitch Average Velocity Standard Deviation of Velocity Average Spin Rate Standard Deviation of Spin Rate
4-Seam Fastball 95.5566494 1.0051742 2354.0876494 79.8169925
2-Seam Fastball 95.3531615 1.0941209 2220.21875 82.8413235
Slider 90.0255882 1.5517429 2411.0294118 100.1901994
Curveball 81.6802556 1.2584525 2640.5263158 191.7004283
Changeup 87.4685556 1.5076367 1688.5925926 145.5985681

Now let’s make some graphs.

taillon.heatmap.out.2018 = graph.pitch.heatmap.out(taillon.2018)
taillon.heatmap.out.2018

taillon.heatmap.type.2018 = graph.pitch.heatmap.type(taillon.2018)
taillon.heatmap.type.2018

taillon.heatmap.velo.2018 = graph.pitch.heatmap.velo(taillon.2018)
taillon.heatmap.velo.2018

taillon.spin.2018 = graph.pitch.spin(taillon.2018)
taillon.spin.2018

taillon.velo.2018 = graph.pitch.velo(taillon.2018)
taillon.velo.2018

taillon.xmove.2018 = graph.pitch.xmovement(taillon.2018)
taillon.xmove.2018

taillon.ymove.2018 = graph.pitch.ymovement(taillon.2018)
taillon.ymove.2018

What’s also important is to determine how many of his pitches were barrelled (strong contact).

count.barrels = function(player) {
    s = sum(player$contact == 6, na.rm = TRUE)
    s
}

barrel.probability = function(player) {
    b = count.barrels(player)
    t = NROW(player$contact)
    p = (b * 1.0)/t
    p
}

Now let’s take a look at his barrel probability for all his pitches.

Pitch Number of Barrels Barrel Probability
All 24 0.0081081
4-Seam Fastball 7 0.0278884
2-Seam Fastball 4 0.0208333
Slider 6 0.0352941
Curveball 4 0.0300752
Changeup 2 0.0740741

Let’s also take a look at his pitch frequencies.

pitch.count = function(player, type) {
    c = sum(player$pitch == type, na.rm = TRUE)
    c
}

pitch.frequency = function(player, type) {
    c = pitch.count(player, type)
    t = NROW(player$pitch)
    f = (c * 1.0) / t
    f
}

Let’s view all his frequencies together in a tabular format.

Pitch Pitch Count Pitch Frequency
4-Seam Fastball 1050 0.3547297
2-Seam Fastball 647 0.2185811
Slider 543 0.1834459
Curveball 583 0.1969595
Changeup 137 0.0462838

2019

Now, let’s just get some averages of Taillon’s pitches for 2019.

taillon.ff.2019 = taillon.2019[taillon.2019$pitch == "FF",]
taillon.ft.2019 = taillon.2019[taillon.2019$pitch == "FT",]
taillon.sl.2019 = taillon.2019[taillon.2019$pitch == "SL",]
taillon.cu.2019 = taillon.2019[taillon.2019$pitch == "CU",]
taillon.ch.2019 = taillon.2019[taillon.2019$pitch == "CH",]

taillon.ff.2019 = taillon.ff.2019[complete.cases(taillon.ff.2019),]
taillon.ft.2019 = taillon.ft.2019[complete.cases(taillon.ft.2019),]
taillon.sl.2019 = taillon.sl.2019[complete.cases(taillon.sl.2019),]
taillon.cu.2019 = taillon.cu.2019[complete.cases(taillon.cu.2019),]
taillon.ch.2019 = taillon.ch.2019[complete.cases(taillon.ch.2019),]
Pitch Average Velocity Standard Deviation of Velocity Average Spin Rate Standard Deviation of Spin Rate
4-Seam Fastball 94.7436286 1.1608357 2320 64.3967482
2-Seam Fastball 95.4377838 0.8237644 2304.5945946 56.1537866
Slider 88.7471961 1.371977 2475.3137255 61.3812643
Curveball 82.0296 1.4732223 2756.88 77.8937952
Changeup 88.2098889 1.3323036 1852.1111111 198.4461164

Now let’s make some graphs.

taillon.heatmap.out.2019 = graph.pitch.heatmap.out(taillon.2019)
taillon.heatmap.out.2019

taillon.heatmap.type.2019 = graph.pitch.heatmap.type(taillon.2019)
taillon.heatmap.type.2019

taillon.heatmap.velo.2019 = graph.pitch.heatmap.velo(taillon.2019)
taillon.heatmap.velo.2019

taillon.spin.2019 = graph.pitch.spin(taillon.2019)
taillon.spin.2019

taillon.velo.2019 = graph.pitch.velo(taillon.2019)
taillon.velo.2019

taillon.xmove.2019 = graph.pitch.xmovement(taillon.2019)
taillon.xmove.2019

taillon.ymove.2019 = graph.pitch.ymovement(taillon.2019)
taillon.ymove.2019

What’s also important is to determine how many of his pitches were barrelled (strong contact).

Now let’s take a look at his barrel probability for all his pitches.

Pitch Number of Barrels Barrel Probability
All 8 0.0144144
4-Seam Fastball 2 0.0571429
2-Seam Fastball 2 0.0540541
Slider 3 0.0588235
Curveball 0 0
Changeup 1 0.1111111

Let’s view all his frequencies together in a tabular format.

Pitch Pitch Count Pitch Frequency
4-Seam Fastball 151 0.2720721
2-Seam Fastball 110 0.1981982
Slider 177 0.3189189
Curveball 87 0.1567568
Changeup 30 0.0540541

Richard Rodriguez

Data Acquisition

rodriguez.data.2018 = scrape.data("2018-01-01", 593144)
## 2018-01-01 is not a date. Attempting to coerce...
## https://baseballsavant.mlb.com/statcast_search/csv?all=true&hfPT=&hfAB=&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=R%7CPO%7CS%7C&hfC&hfSea=2018%7C&hfSit=&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&player_type=pitcher&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&pitchers_lookup%5B%5D=593144&game_date_gt=2018-01-01&game_date_lt=2019-05-12&hfFlag=&hfPull=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=h_launch_speed&sort_order=desc&min_abs=0&type=details
## These data are from BaseballSevant and are property of MLB Advanced Media, L.P. All rights reserved.
## Grabbing data, this may take a minute...
## URL read and payload acquired successfully.
rodriguez.2018 = filter.data(rodriguez.data.2018)
rodriguez.data.2019 = scrape.data("2019-01-01", 593144)
## 2019-01-01 is not a date. Attempting to coerce...
## https://baseballsavant.mlb.com/statcast_search/csv?all=true&hfPT=&hfAB=&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=R%7CPO%7CS%7C&hfC&hfSea=2019%7C&hfSit=&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&player_type=pitcher&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&pitchers_lookup%5B%5D=593144&game_date_gt=2019-01-01&game_date_lt=2019-05-12&hfFlag=&hfPull=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=h_launch_speed&sort_order=desc&min_abs=0&type=details
## These data are from BaseballSevant and are property of MLB Advanced Media, L.P. All rights reserved.
## Grabbing data, this may take a minute...
## URL read and payload acquired successfully.
rodriguez.2019 = filter.data(rodriguez.data.2019)

2018

Now, let’s just get some averages of Rodriguez’s pitches.

rodriguez.ff.2018 = rodriguez.2018[rodriguez.2018$pitch == "FF",]
rodriguez.sl.2018 = rodriguez.2018[rodriguez.2018$pitch == "SL",]

rodriguez.ff.2018 = rodriguez.ff.2018[complete.cases(rodriguez.ff.2018),]
rodriguez.sl.2018 = rodriguez.sl.2018[complete.cases(rodriguez.sl.2018),]
Pitch Average Velocity Standard Deviation of Velocity Average Spin Rate Standard Deviation of Spin Rate
4-Seam Fastball 93.209327 0.9683027 2372.8341232 79.3036117
Slider 80.8337846 1.1134172 2136.3538462 108.7684511

Now let’s make some graphs.

rodriguez.heatmap.out.2018 = graph.pitch.heatmap.out(rodriguez.2018)
rodriguez.heatmap.out.2018
## Warning: Removed 2 rows containing missing values (geom_point).

rodriguez.heatmap.type.2018 = graph.pitch.heatmap.type(rodriguez.2018)
rodriguez.heatmap.type.2018
## Warning: Removed 2 rows containing missing values (geom_point).

rodriguez.heatmap.velo.2018 = graph.pitch.heatmap.velo(rodriguez.2018)
rodriguez.heatmap.velo.2018
## Warning: Removed 2 rows containing missing values (geom_point).

rodriguez.spin.2018 = graph.pitch.spin(rodriguez.2018)
rodriguez.spin.2018
## Warning: Removed 2 rows containing missing values (geom_path).

rodriguez.velo.2018 = graph.pitch.velo(rodriguez.2018)
rodriguez.velo.2018
## Warning: Removed 2 rows containing missing values (geom_path).

rodriguez.xmove.2018 = graph.pitch.xmovement(rodriguez.2018)
rodriguez.xmove.2018
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

rodriguez.ymove.2018 = graph.pitch.ymovement(rodriguez.2018)
rodriguez.ymove.2018
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

Now let’s take a look at his barrel probability for all his pitches.

Pitch Number of Barrels Barrel Probability
All 8 0.0070609
4-Seam Fastball 6 0.028436
Slider 2 0.0307692

Let’s view all his frequencies together in a tabular format.

Pitch Pitch Count Pitch Frequency
4-Seam Fastball 849 0.749338
Slider 281 0.2480141

2019

Now, let’s just get some averages of Rodriguez’s pitches.

rodriguez.ff.2019 = rodriguez.2019[rodriguez.2019$pitch == "FF",]
rodriguez.sl.2019 = rodriguez.2019[rodriguez.2019$pitch == "SL",]

rodriguez.ff.2019 = rodriguez.ff.2019[complete.cases(rodriguez.ff.2019),]
rodriguez.sl.2019 = rodriguez.sl.2019[complete.cases(rodriguez.sl.2019),]
Pitch Average Velocity Standard Deviation of Velocity Average Spin Rate Standard Deviation of Spin Rate
4-Seam Fastball 92.8783182 1.0748071 2484.1969697 76.6205877
Slider 80.2401 1.5242642 2397.1 92.3789899

Now let’s make some graphs.

rodriguez.heatmap.out.2019 = graph.pitch.heatmap.out(rodriguez.2019)
rodriguez.heatmap.out.2019

rodriguez.heatmap.type.2019 = graph.pitch.heatmap.type(rodriguez.2019)
rodriguez.heatmap.type.2019

rodriguez.heatmap.velo.2019 = graph.pitch.heatmap.velo(rodriguez.2019)
rodriguez.heatmap.velo.2019

rodriguez.spin.2019 = graph.pitch.spin(rodriguez.2019)
rodriguez.spin.2019

rodriguez.velo.2019 = graph.pitch.velo(rodriguez.2019)
rodriguez.velo.2019

rodriguez.xmove.2019 = graph.pitch.xmovement(rodriguez.2019)
rodriguez.xmove.2019

rodriguez.ymove.2019 = graph.pitch.ymovement(rodriguez.2019)
rodriguez.ymove.2019

Now let’s take a look at his barrel probability for all his pitches.

Pitch Number of Barrels Barrel Probability
All 6 0.017192
4-Seam Fastball 6 0.0909091
Slider 0 0

Let’s view all his frequencies together in a tabular format.

Pitch Pitch Count Pitch Frequency
4-Seam Fastball 296 0.8481375
Slider 47 0.1346705

Chris Archer

Data Acquisition

archer.data.2018 = scrape.data("2018-05-31", 502042)
## 2018-05-31 is not a date. Attempting to coerce...
## https://baseballsavant.mlb.com/statcast_search/csv?all=true&hfPT=&hfAB=&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=R%7CPO%7CS%7C&hfC&hfSea=2018%7C&hfSit=&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&player_type=pitcher&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&pitchers_lookup%5B%5D=502042&game_date_gt=2018-05-31&game_date_lt=2019-05-12&hfFlag=&hfPull=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=h_launch_speed&sort_order=desc&min_abs=0&type=details
## These data are from BaseballSevant and are property of MLB Advanced Media, L.P. All rights reserved.
## Grabbing data, this may take a minute...
## URL read and payload acquired successfully.
archer.2018 = filter.data(archer.data.2018)

archer.data.2019 = scrape.data("2019-01-01", 502042)
## 2019-01-01 is not a date. Attempting to coerce...
## https://baseballsavant.mlb.com/statcast_search/csv?all=true&hfPT=&hfAB=&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=R%7CPO%7CS%7C&hfC&hfSea=2019%7C&hfSit=&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&player_type=pitcher&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&pitchers_lookup%5B%5D=502042&game_date_gt=2019-01-01&game_date_lt=2019-05-12&hfFlag=&hfPull=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=h_launch_speed&sort_order=desc&min_abs=0&type=details
## These data are from BaseballSevant and are property of MLB Advanced Media, L.P. All rights reserved.
## Grabbing data, this may take a minute...
## URL read and payload acquired successfully.
archer.2019 = filter.data(archer.data.2019)

2018

Now, let’s just get some averages of Archer’s pitches for 2018.

archer.ff.2018 = archer.2018[archer.2018$pitch == "FF",]
archer.ft.2018 = archer.2018[archer.2018$pitch == "FT",]
archer.sl.2018 = archer.2018[archer.2018$pitch == "SL",]
archer.cu.2018 = archer.2018[archer.2018$pitch == "CU",]
archer.ch.2018 = archer.2018[archer.2018$pitch == "CH",]

archer.ff.2018 = archer.ff.2018[complete.cases(archer.ff.2018),]
archer.ft.2018 = archer.ft.2018[complete.cases(archer.ft.2018),]
archer.sl.2018 = archer.sl.2018[complete.cases(archer.sl.2018),]
archer.cu.2018 = archer.cu.2018[complete.cases(archer.cu.2018),]
archer.ch.2018 = archer.ch.2018[complete.cases(archer.ch.2018),]
Pitch Average Velocity Standard Deviation of Velocity Average Spin Rate Standard Deviation of Spin Rate
4-Seam Fastball 94.5243506 1.0583087 2234.038961 76.8786538
2-Seam Fastball 94.6049245 1.0543612 2214.9811321 80.7752082
Slider 87.3661053 1.4032173 2580.9035088 87.4421143
Curveball 80.8534286 1.3645845 2639.1428571 32.6875541
Changeup 88.1620303 1.043588 1698.3030303 164.3398698

Now let’s make some graphs.

archer.heatmap.out.2018 = graph.pitch.heatmap.out(archer.2018)
archer.heatmap.out.2018
## Warning: Removed 1 rows containing missing values (geom_point).

archer.heatmap.type.2018 = graph.pitch.heatmap.type(archer.2018)
archer.heatmap.type.2018
## Warning: Removed 1 rows containing missing values (geom_point).

archer.heatmap.velo.2018 = graph.pitch.heatmap.velo(archer.2018)
archer.heatmap.velo.2018
## Warning: Removed 1 rows containing missing values (geom_point).

archer.spin.2018 = graph.pitch.spin(archer.2018)
archer.spin.2018
## Warning: Removed 1 rows containing missing values (geom_path).

archer.velo.2018 = graph.pitch.velo(archer.2018)
archer.velo.2018
## Warning: Removed 1 rows containing missing values (geom_path).

archer.xmove.2018 = graph.pitch.xmovement(archer.2018)
archer.xmove.2018
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

archer.ymove.2018 = graph.pitch.ymovement(archer.2018)
archer.ymove.2018
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

What’s also important is to determine how many of his pitches were barrelled (strong contact).

count.barrels = function(player) {
    s = sum(player$contact == 6, na.rm = TRUE)
    s
}

barrel.probability = function(player) {
    b = count.barrels(player)
    t = NROW(player$contact)
    p = (b * 1.0)/t
    p
}

Now let’s take a look at his barrel probability for all his pitches.

Pitch Number of Barrels Barrel Probability
All 12 0.0087977
4-Seam Fastball 3 0.038961
2-Seam Fastball 2 0.0377358
Slider 4 0.0350877
Curveball 0 0
Changeup 1 0.030303

Let’s also take a look at his pitch frequencies.

pitch.count = function(player, type) {
    c = sum(player$pitch == type, na.rm = TRUE)
    c
}

pitch.frequency = function(player, type) {
    c = pitch.count(player, type)
    t = NROW(player$pitch)
    f = (c * 1.0) / t
    f
}

Let’s view all his frequencies together in a tabular format.

Pitch Pitch Count Pitch Frequency
4-Seam Fastball 410 0.3005865
2-Seam Fastball 238 0.1744868
Slider 550 0.4032258
Curveball 36 0.026393
Changeup 129 0.0945748

2019

Now, let’s just get some averages of Archer’s pitches for 2019.

archer.ff.2019 = archer.2019[archer.2019$pitch == "FF",]
archer.ft.2019 = archer.2019[archer.2019$pitch == "FT",]
archer.fc.2019 = archer.2019[archer.2019$pitch == "FC",]
archer.sl.2019 = archer.2019[archer.2019$pitch == "SL",]
archer.cu.2019 = archer.2019[archer.2019$pitch == "CU",]
archer.ch.2019 = archer.2019[archer.2019$pitch == "CH",]

archer.ff.2019 = archer.ff.2019[complete.cases(archer.ff.2019),]
archer.ft.2019 = archer.ft.2019[complete.cases(archer.ft.2019),]
archer.fc.2019 = archer.fc.2019[complete.cases(archer.fc.2019),]
archer.sl.2019 = archer.sl.2019[complete.cases(archer.sl.2019),]
archer.cu.2019 = archer.cu.2019[complete.cases(archer.cu.2019),]
archer.ch.2019 = archer.ch.2019[complete.cases(archer.ch.2019),]
Pitch Average Velocity Standard Deviation of Velocity Average Spin Rate Standard Deviation of Spin Rate
4-Seam Fastball 92.4679667 1.5239474 2226.0666667 87.0441558
2-Seam Fastball 92.3679524 1.0896483 2215.5238095 100.7053221
Cut Fastball 86.619 1.2520719 2233.3333333 635.262413
Slider 85.7914324 1.2343418 2555.6486486 118.6238725
Curveball NaN NA NaN NA
Changeup 86.887375 1.3428062 1647.1875 145.2327873

Now let’s make some graphs.

archer.heatmap.out.2019 = graph.pitch.heatmap.out(archer.2019)
archer.heatmap.out.2019

archer.heatmap.type.2019 = graph.pitch.heatmap.type(archer.2019)
archer.heatmap.type.2019

archer.heatmap.velo.2019 = graph.pitch.heatmap.velo(archer.2019)
archer.heatmap.velo.2019

archer.spin.2019 = graph.pitch.spin(archer.2019)
archer.spin.2019

archer.velo.2019 = graph.pitch.velo(archer.2019)
archer.velo.2019

archer.xmove.2019 = graph.pitch.xmovement(archer.2019)
archer.xmove.2019

archer.ymove.2019 = graph.pitch.ymovement(archer.2019)
archer.ymove.2019

What’s also important is to determine how many of his pitches were barrelled (strong contact).

Now let’s take a look at his barrel probability for all his pitches.

Pitch Number of Barrels Barrel Probability
All 8 0.0172414
4-Seam Fastball 2 0.0666667
2-Seam Fastball 2 0.0952381
Cut Fastball 0 0
Slider 2 0.0540541
Curveball 0 NaN
Changeup 2 0.125

Let’s also take a look at his pitch frequencies.

Let’s view all his frequencies together in a tabular format.

Pitch Pitch Count Pitch Frequency
4-Seam Fastball 152 0.3275862
2-Seam Fastball 78 0.1681034
Cut Fastball 3 0.0064655
Slider 160 0.3448276
Curveball 8 0.0172414
Changeup 63 0.1357759

Jordan Lyles

lyles.data = scrape.data("2018-01-01", 543475)
## 2018-01-01 is not a date. Attempting to coerce...
## https://baseballsavant.mlb.com/statcast_search/csv?all=true&hfPT=&hfAB=&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=R%7CPO%7CS%7C&hfC&hfSea=2018%7C&hfSit=&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&player_type=pitcher&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&pitchers_lookup%5B%5D=543475&game_date_gt=2018-01-01&game_date_lt=2019-05-12&hfFlag=&hfPull=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=h_launch_speed&sort_order=desc&min_abs=0&type=details
## These data are from BaseballSevant and are property of MLB Advanced Media, L.P. All rights reserved.
## Grabbing data, this may take a minute...
## URL read and payload acquired successfully.
lyles = filter.data(lyles.data)

Kyle Crick

crick.data = scrape.data("2018-01-01", 605195)
## 2018-01-01 is not a date. Attempting to coerce...
## https://baseballsavant.mlb.com/statcast_search/csv?all=true&hfPT=&hfAB=&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=R%7CPO%7CS%7C&hfC&hfSea=2018%7C&hfSit=&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&player_type=pitcher&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&pitchers_lookup%5B%5D=605195&game_date_gt=2018-01-01&game_date_lt=2019-05-12&hfFlag=&hfPull=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=h_launch_speed&sort_order=desc&min_abs=0&type=details
## These data are from BaseballSevant and are property of MLB Advanced Media, L.P. All rights reserved.
## Grabbing data, this may take a minute...
## URL read and payload acquired successfully.
crick = filter.data(crick.data)